

load("data/sample10K.RData")
library(dplyr)
d <- filter(d, year==2014)
save(d, file = "data/sample_2014.RData")
## Exact Replication: the original 50 questions ## 

#install.packages("lavaan")
#install.packages("doParallel","foreach")

########################################################
## CFA analysis: Searching for a better model 
########################################################
########################################################

### WARNING - this part takes extremely long processing time
### Results can be directly loaded from intermediate file (line 115)

rm(list=ls(all=TRUE)) 
library(lavaan)

## ## load data
load("data/sample_2014.RData")
names(d)
table(d$year)

## ## Grouping survey questions
questions <- paste("q", 1:50, sep = "")
## Pan&Xu
f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism
f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market
f3 <- c(32, 41, 42, 43, 46, 47, 48, 49, 50) # traditional values
f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism
f5 <- c(24, 6, 7, 8, 44, 45) # individual freedom
f6 <- c(23, 26, 28, 35, 39) # national interest
f7 <- c(22, 31, 33, 34, 36, 38) # social justice
FSet <- list(f1, f2, f3, f4, f5, f6, f7)

## ## a function that generates lavaan model syntax from a list of nubmers
genModel <- function(model.list) {
  model <- ""
  for (j in 1:length(model.list)) {
    model <- paste(model,
                   paste('factor',j,' =~ ',
                         paste(paste('q', model.list[[j]], sep=""),
                               collapse = " + "), sep = ""),
                   "\n", sep = "") 
  }
  return(model)
}

## ## first, search for the best combination among the three base categories
PSet <- matrix(NA, 0, 7)
colnames(PSet) <- paste("assign",1:7, sep = "")

## ## 877 possibilities
c1 <- 1
for (c2 in 1:2) {
  for (c3 in 1:(max(c1,c2)+1)) {
    for (c4 in 1:(max(c1,c2,c3)+1)) {
      for (c5 in 1:(max(c1,c2,c3,c4)+1)) {
        for (c6 in 1:(max(c1,c2,c3,c4,c5)+1)) {
          for (c7 in 1:(max(c1,c2,c3,c4,c5,c6)+1)) {
            PSet <- rbind(PSet, c(c1, c2, c3, c4, c5, c6, c7))
          }
        }
      }
    } 
  }
}
results <- matrix(NA, dim(PSet)[1], 6)
colnames(results) <- c("converged","post.check","chisq", "cfi", "tli", "rmsea")

## ## parallel computing: THIS'S GONNA TAKE A WHILE
## ## jump to line 113 to see the results

library(doParallel)
library(foreach)
cl <- makeCluster(10)
registerDoParallel(cl)
system.time(
  results <- foreach (i=1:dim(PSet)[1], .combine = rbind, .packages = c("lavaan"),
                      .inorder = FALSE) %dopar% {
                        model.list <- list()
                        for (j in 1:7) {
                          if (j %in% PSet[i, ]) {
                            model.list <- c(model.list, list(unique(unlist(FSet[which(PSet[i, ] == j)]))))
                          }
                        }
                        suppressWarnings(fit <- cfa(genModel(model.list),
                                                    ordered = questions,
                                                    data=d[,questions],
                                                    std.lv = TRUE))
                        converged <- inspect(fit, c("converged"))
                        if (converged == TRUE) {
                          post.cov <- suppressWarnings(inspect(fit, c("post.check")))
                          out <- c("converged" = converged, "post.cov" = post.cov,
                                   fitMeasures(fit, c("chisq","cfi", "tli","rmsea"))) 
                        } else {
                          out <- c(0, rep(NA, 5))
                        }
                        return(out)
                      })
stopCluster(cl)
colnames(results) <- c("converged", "post.check","chisq", "cfi", "tli", "rmsea")

ndim <- apply(PSet[,1:7],1,max)
results.final <- cbind.data.frame(PSet, results, ndim)
results.final$id <- 1:dim(results.final)[1]
rownames(results.final) <- NULL
head(results.final)

save(results.final, file = "output/result_cfa_search_2014.RData")